json_file_path <- "data/mc2_challenge_graph.json"
mc2_file_path <- "data/mc2.rds"
if (!file.exists(mc2_file_path)) {
mc2 <- fromJSON(json_file_path)
saveRDS(mc2, mc2_file_path)
} else {
mc2 <- readRDS(mc2_file_path)
}Take-home Exercise 02
mc2_nodes <- as_tibble(mc2$nodes) %>%
select(id, shpcountry, rcvcountry)
mc2_edges <- as_tibble(mc2$links) %>%
mutate(ArrivalDate = ymd(arrivaldate)) %>%
mutate(Year = year(ArrivalDate)) %>%
select(
source,
target,
ArrivalDate,
Year,
hscode,
valueofgoods_omu,
volumeteu,
weightkg,
valueofgoodsusd
) %>%
distinct()
mc2_edges$grp_hscode <- substr(mc2_edges$hscode, 1, 1)ids <- union(unique(mc2_edges$source),
unique(mc2_edges$target)) %>% sort() %>% as_tibble()
colnames(ids) <- "name"
ids <- ids %>% mutate(cid = row_number())
ids$cid <- factor(ids$cid)mc2_edges_agg <- mc2_edges %>%
group_by(source, target, grp_hscode, Year) %>%
summarise(num_trades = n(),
total_weightkg = sum(weightkg)) %>%
filter(source != target) %>%
filter(num_trades > 20) %>%
ungroup()id1 <- mc2_edges_agg %>%
select(source) %>%
rename(id = source)
id2 <- mc2_edges_agg %>%
select(target) %>%
rename(id = target)
mc2_nodes_extracted <- rbind(id1, id2) %>%
distinct()
mc2_nodes_extracted <- merge(mc2_nodes_extracted,
ids,
by.x = "id",
by.y = "name")
rm(id1, id2)mc2_graph <- tbl_graph(nodes = mc2_nodes_extracted,
edges = mc2_edges_agg,
directed = TRUE) %>%
activate(nodes) %>%
mutate(betweenness_centrality = centrality_betweenness(weights = num_trades)) %>%
mutate(outdegree_centrality = centrality_degree(weights = num_trades,
mode = "out"))years = c("2028")
for (y in years) {
mygraph <- paste("mc2", "graph", y, sep = "_")
assign(
mygraph,
mc2_graph %>%
activate(edges) %>%
filter(Year == y) %>%
activate(nodes) %>%
filter(!node_is_isolated()) %>%
mutate(betweenness_centrality = centrality_betweenness(weights = num_trades)) %>%
mutate(outdegree_centrality = centrality_degree(weights = num_trades,
mode = "out"))
)
assign(
paste("g", y, sep = "_"),
ggraph(get(mygraph),
layout = "nicely") +
geom_edge_link(aes(width = num_trades,
color = grp_hscode),
alpha = 0.6) +
scale_edge_width(range = c(0.4, 4), name = "Total weight") +
scale_edge_color_brewer(name = "HS code group",
palette = "Set1") +
geom_point_interactive(
aes(
x = x,
y = y,
tooltip = paste0(
"Name: ", id,
"\nCompany ID: ", cid,
"\nOut-degree: ", outdegree_centrality,
"\nBetweenness: ", betweenness_centrality
),
data_id = outdegree_centrality > 0,
size = betweenness_centrality,
fill = outdegree_centrality > 0
),
colour = "grey20",
shape = 21,
alpha = 0.8
) +
scale_fill_manual(labels = c("Zero", "Non-zero"), values = c("cyan", "firebrick1"), name = "Out-degree") +
scale_size_continuous(range = (c(1, 10)), name = "Betweenness") +
theme_graph(
foreground = "grey20",
) +
labs(title = y) +
theme(plot.title = element_text(size = 11))
)
}
rm(y, years, mygraph)